Geographic Maps with R

2023-06-09

Setup

Code
library(ggplot2)
library(tidyverse)
library(plotly)

# for map definitions and manipulations
library(sf)
library("rnaturalearth")
#library("rnaturalearthdata")
library(maps)

library(gapminder)     # for some world data
library(RColorBrewer)  # for color maps

Maps with ggplot2

Maps are polygons and can be plotted using ggplot2::geom_map().

Below I illustrate this with

  1. a simple map of the world and
  2. a more customized version of Italy.
Code
# World Map
world_map <- ggplot2::map_data('world')
world_map %>% head(3)
       long      lat group order region subregion
1 -69.89912 12.45200     1     1  Aruba      <NA>
2 -69.89571 12.42300     1     2  Aruba      <NA>
3 -69.94219 12.43853     1     3  Aruba      <NA>
Code
p = ggplot(data=world_map)
p + geom_map(map=world_map, aes(map_id=region, fill=region)) + 
  expand_limits(x = world_map$long, y = world_map$lat) +
  theme(legend.position = "none") +
  ggtitle('World with default colours')

Code
# Italy map with Pastel colours
italy_map = map_data("italy")
italy_map %>% head(3)
      long      lat group order        region subregion
1 11.83295 46.50011     1     1 Bolzano-Bozen      <NA>
2 11.81089 46.52784     1     2 Bolzano-Bozen      <NA>
3 11.73068 46.51890     1     3 Bolzano-Bozen      <NA>
Code
# define a custom colour palette and recycle the colours
palette <- brewer.pal(8, "Dark2")
my_palette <- rep(palette, length.out = 100)

ggplot(data=italy_map) +
  geom_map(map=italy_map, aes(map_id=region, fill=region)) + 
  expand_limits(x = italy_map$long, y = italy_map$lat) +
  scale_fill_manual(values = my_palette) +
  theme(legend.position = "none") + 
  ggtitle('Italy with custom colours')

Overlap with Data

Projecting external data onto geographic maps is easy. We just need to make sure that the data contains a variable (map_id) that corresponds to the region variable in the map.

First we show this for a standard data set that comes with R: USArrests

Code
# map of US states as polygon
states_map <- ggplot2::map_data("state")

# simple map of regions
p <- ggplot(data=states_map) + 
  geom_map(map=states_map, aes(map_id=region, fill=region)) +
  expand_limits(x = states_map$long, y = states_map$lat) +
  ggtitle('US states with colour-coded regions') +
  theme(legend.position = "none")
p

Code
# USA crime data (match 'state' format to 'region' definition in map)
d = USArrests  %>% mutate(state=tolower(rownames(.)))

d %>% head(3)
        Murder Assault UrbanPop Rape   state
Alabama   13.2     236       58 21.2 alabama
Alaska    10.0     263       48 44.5  alaska
Arizona    8.1     294       80 31.0 arizona
Code
states_map %>% head(3)
       long      lat group order  region subregion
1 -87.46201 30.38968     1     1 alabama      <NA>
2 -87.48493 30.37249     1     2 alabama      <NA>
3 -87.52503 30.37249     1     3 alabama      <NA>
Code
# link map with data, by  map_id = state = region
p <- ggplot(d) +
  geom_map(map = states_map, aes(map_id = state, fill = Murder) ) + 
  expand_limits(x = states_map$long, y = states_map$lat) +
  ggtitle('US states with colour-coded regions by data values')
p

More Examples

Next I repeat the same for other data sets: gapminder and a custom data set with emigration data from an Excel file.

Notice that some countries are missing because map by name failed (e.g. USA). This should be fixed, but I leave it here to highlight the challenge.

Code
# gapminder data
ggplot(data=gapminder) +
  geom_map(map=world_map, 
           aes(fill=lifeExp, map_id=country), colour = "#7f7f7f") +
  expand_limits(x = world_map$long, y = world_map$lat) +
  ggtitle('world_map with gapminder(lifeExp)')

Code
# read emigration data from Excel file
fn = "data/20200501_emigrant_remittance_data_visualization_raw.xlsx"
emigration_data = readxl::read_excel(path = fn)

#simplify the column names
colnames(emigration_data)=c("country", "Pop2019", "Emi2019", "Share_t", "Share_i", "Remit2019_V", "Remit2019_GDP")

# using the orginal world_map (long, lat, region) and data emigration_data
# where emigration_data$country is used to map to world_map$region

ggplot(data=emigration_data) +
  geom_map(map=world_map, 
           aes(fill=Emi2019, map_id=country), colour = "#7f7f7f") +
  expand_limits(x = world_map$long, y = world_map$lat) +
  ggtitle('world_map with own data(Emi2019)')

Cooordinates: fine tuning

There are various coordinate systems. We can use coord_sf() to make specific choices for beautification.

Here I revisit the plot of USA murder statistics from above

Code
# crs = 5070 is a Conus Albers projection for North America, see: https://epsg.io/5070
# default_crs = 4326 tells coord_sf() that the input map data are in longitude-latitude format

coords <- ggplot2::coord_sf(
  crs = 5070, default_crs = 4326,
  xlim = c(-125, -70), ylim = c(25, 52)
  )

# p was the defined above
p + coords

Multiple Maps

Using a long data format and facet_wrap we can also plot multiple maps.

Code
d_long <- d %>% pivot_longer(Murder:Rape)
d_long %>% head(3)
# A tibble: 3 × 3
  state   name     value
  <chr>   <chr>    <dbl>
1 alabama Murder    13.2
2 alabama Assault  236  
3 alabama UrbanPop  58  
Code
ggplot(d_long) +
  geom_map(map = states_map, aes(map_id = state, fill = value) ) +
  coords + 
  facet_wrap(~name) 

sf format and geom_sf

sf is a common data standard for simple feature objects such as geographical data.

The sf package provides many tools for reading and transforming sf data.

ggplot2 provides a convenient interface geom_sf to plot such data.

Code
nc <- sf::st_read(system.file("shape/nc.shp", package = "sf"), quiet = TRUE)
glimpse(nc)
Rows: 100
Columns: 15
$ AREA      <dbl> 0.114, 0.061, 0.143, 0.070, 0.153, 0.097, 0.062, 0.091, 0.11…
$ PERIMETER <dbl> 1.442, 1.231, 1.630, 2.968, 2.206, 1.670, 1.547, 1.284, 1.42…
$ CNTY_     <dbl> 1825, 1827, 1828, 1831, 1832, 1833, 1834, 1835, 1836, 1837, …
$ CNTY_ID   <dbl> 1825, 1827, 1828, 1831, 1832, 1833, 1834, 1835, 1836, 1837, …
$ NAME      <chr> "Ashe", "Alleghany", "Surry", "Currituck", "Northampton", "H…
$ FIPS      <chr> "37009", "37005", "37171", "37053", "37131", "37091", "37029…
$ FIPSNO    <dbl> 37009, 37005, 37171, 37053, 37131, 37091, 37029, 37073, 3718…
$ CRESS_ID  <int> 5, 3, 86, 27, 66, 46, 15, 37, 93, 85, 17, 79, 39, 73, 91, 42…
$ BIR74     <dbl> 1091, 487, 3188, 508, 1421, 1452, 286, 420, 968, 1612, 1035,…
$ SID74     <dbl> 1, 0, 5, 1, 9, 7, 0, 0, 4, 1, 2, 16, 4, 4, 4, 18, 3, 4, 1, 1…
$ NWBIR74   <dbl> 10, 10, 208, 123, 1066, 954, 115, 254, 748, 160, 550, 1243, …
$ BIR79     <dbl> 1364, 542, 3616, 830, 1606, 1838, 350, 594, 1190, 2038, 1253…
$ SID79     <dbl> 0, 3, 6, 2, 3, 5, 2, 2, 2, 5, 2, 5, 4, 4, 6, 17, 4, 7, 1, 0,…
$ NWBIR79   <dbl> 19, 12, 260, 145, 1197, 1237, 139, 371, 844, 176, 597, 1369,…
$ geometry  <MULTIPOLYGON [°]> MULTIPOLYGON (((-81.47276 3..., MULTIPOLYGON ((…
Code
ggplot(nc) + geom_sf(aes(fill = AREA))

Code
nc_3857 <- sf::st_transform(nc, 3857)
ggplot(nc_3857) + geom_sf(aes(fill = PERIMETER))

Maps from rnaturalearth

There are additional libraries to retrieve Geo-coordinates at various resolutions and formats.

Code
# a world map from the rnaturalearth library in sf format
# this includes a geometry field per region (defined as MULTIPOLYGON)
# notice that 'world' also contains additional data such as population 'pop_est'
world <- rnaturalearth::ne_countries(scale = "medium", returnclass = "sf")
world %>% glimpse()
Rows: 241
Columns: 64
$ scalerank  <int> 3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 3, 1, 5, 3, 1, 1, 1, 1, 1, 1,…
$ featurecla <chr> "Admin-0 country", "Admin-0 country", "Admin-0 country", "A…
$ labelrank  <dbl> 5, 3, 3, 6, 6, 6, 6, 4, 2, 6, 4, 4, 5, 6, 6, 2, 4, 5, 6, 2,…
$ sovereignt <chr> "Netherlands", "Afghanistan", "Angola", "United Kingdom", "…
$ sov_a3     <chr> "NL1", "AFG", "AGO", "GB1", "ALB", "FI1", "AND", "ARE", "AR…
$ adm0_dif   <dbl> 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0,…
$ level      <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
$ type       <chr> "Country", "Sovereign country", "Sovereign country", "Depen…
$ admin      <chr> "Aruba", "Afghanistan", "Angola", "Anguilla", "Albania", "A…
$ adm0_a3    <chr> "ABW", "AFG", "AGO", "AIA", "ALB", "ALD", "AND", "ARE", "AR…
$ geou_dif   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ geounit    <chr> "Aruba", "Afghanistan", "Angola", "Anguilla", "Albania", "A…
$ gu_a3      <chr> "ABW", "AFG", "AGO", "AIA", "ALB", "ALD", "AND", "ARE", "AR…
$ su_dif     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ subunit    <chr> "Aruba", "Afghanistan", "Angola", "Anguilla", "Albania", "A…
$ su_a3      <chr> "ABW", "AFG", "AGO", "AIA", "ALB", "ALD", "AND", "ARE", "AR…
$ brk_diff   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ name       <chr> "Aruba", "Afghanistan", "Angola", "Anguilla", "Albania", "A…
$ name_long  <chr> "Aruba", "Afghanistan", "Angola", "Anguilla", "Albania", "A…
$ brk_a3     <chr> "ABW", "AFG", "AGO", "AIA", "ALB", "ALD", "AND", "ARE", "AR…
$ brk_name   <chr> "Aruba", "Afghanistan", "Angola", "Anguilla", "Albania", "A…
$ brk_group  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ abbrev     <chr> "Aruba", "Afg.", "Ang.", "Ang.", "Alb.", "Aland", "And.", "…
$ postal     <chr> "AW", "AF", "AO", "AI", "AL", "AI", "AND", "AE", "AR", "ARM…
$ formal_en  <chr> "Aruba", "Islamic State of Afghanistan", "People's Republic…
$ formal_fr  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ note_adm0  <chr> "Neth.", NA, NA, "U.K.", NA, "Fin.", NA, NA, NA, NA, "U.S.A…
$ note_brk   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Multiple claim…
$ name_sort  <chr> "Aruba", "Afghanistan", "Angola", "Anguilla", "Albania", "A…
$ name_alt   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ mapcolor7  <dbl> 4, 5, 3, 6, 1, 4, 1, 2, 3, 3, 4, 4, 1, 7, 2, 1, 3, 1, 2, 3,…
$ mapcolor8  <dbl> 2, 6, 2, 6, 4, 1, 4, 1, 1, 1, 5, 5, 2, 5, 2, 2, 1, 6, 2, 2,…
$ mapcolor9  <dbl> 2, 8, 6, 6, 1, 4, 1, 3, 3, 2, 1, 1, 2, 9, 5, 2, 3, 5, 5, 1,…
$ mapcolor13 <dbl> 9, 7, 1, 3, 6, 6, 8, 3, 13, 10, 1, NA, 7, 11, 5, 7, 4, 8, 8…
$ pop_est    <dbl> 103065, 28400000, 12799293, 14436, 3639453, 27153, 83888, 4…
$ gdp_md_est <dbl> 2258.0, 22270.0, 110300.0, 108.9, 21810.0, 1563.0, 3660.0, …
$ pop_year   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ lastcensus <dbl> 2010, 1979, 1970, NA, 2001, NA, 1989, 2010, 2010, 2001, 201…
$ gdp_year   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ economy    <chr> "6. Developing region", "7. Least developed region", "7. Le…
$ income_grp <chr> "2. High income: nonOECD", "5. Low income", "3. Upper middl…
$ wikipedia  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fips_10    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ iso_a2     <chr> "AW", "AF", "AO", "AI", "AL", "AX", "AD", "AE", "AR", "AM",…
$ iso_a3     <chr> "ABW", "AFG", "AGO", "AIA", "ALB", "ALA", "AND", "ARE", "AR…
$ iso_n3     <chr> "533", "004", "024", "660", "008", "248", "020", "784", "03…
$ un_a3      <chr> "533", "004", "024", "660", "008", "248", "020", "784", "03…
$ wb_a2      <chr> "AW", "AF", "AO", NA, "AL", NA, "AD", "AE", "AR", "AM", "AS…
$ wb_a3      <chr> "ABW", "AFG", "AGO", NA, "ALB", NA, "ADO", "ARE", "ARG", "A…
$ woe_id     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ adm0_a3_is <chr> "ABW", "AFG", "AGO", "AIA", "ALB", "ALA", "AND", "ARE", "AR…
$ adm0_a3_us <chr> "ABW", "AFG", "AGO", "AIA", "ALB", "ALD", "AND", "ARE", "AR…
$ adm0_a3_un <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ adm0_a3_wb <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ continent  <chr> "North America", "Asia", "Africa", "North America", "Europe…
$ region_un  <chr> "Americas", "Asia", "Africa", "Americas", "Europe", "Europe…
$ subregion  <chr> "Caribbean", "Southern Asia", "Middle Africa", "Caribbean",…
$ region_wb  <chr> "Latin America & Caribbean", "South Asia", "Sub-Saharan Afr…
$ name_len   <dbl> 5, 11, 6, 8, 7, 5, 7, 20, 9, 7, 14, 10, 23, 22, 17, 9, 7, 1…
$ long_len   <dbl> 5, 11, 6, 8, 7, 13, 7, 20, 9, 7, 14, 10, 27, 35, 19, 9, 7, …
$ abbrev_len <dbl> 5, 4, 4, 4, 4, 5, 4, 6, 4, 4, 9, 4, 7, 10, 6, 4, 5, 4, 4, 5…
$ tiny       <dbl> 4, NA, NA, NA, NA, 5, 5, NA, NA, NA, 3, NA, NA, 2, 4, NA, N…
$ homepart   <dbl> NA, 1, 1, NA, 1, NA, 1, 1, 1, 1, NA, 1, NA, NA, 1, 1, 1, 1,…
$ geometry   <MULTIPOLYGON [°]> MULTIPOLYGON (((-69.89912 1..., MULTIPOLYGON (…
Code
p = ggplot(data = world) +
  geom_sf(aes(fill = log10(pop_est), ids=name)) +
  scale_fill_gradient2(low="white", high="red", midpoint=6)
p
Code
ggsave('maps.png', plot=p)  # thumbnail for blog

Adding new data

We can add new data to the sf object …

Code
# here we join additional data into the world map
D=world %>% inner_join(gapminder, by=c("name"="country"))

# D now contains coordinate `geometry` from world and 
# additional data from gapminder - we plot it again with geom_sf()

p <- D %>% 
  # keep plot size manageable
  filter(year>=1992) %>%                                         
  ggplot(data = .) +
  # add labels to be shown in plotly hover
  geom_sf(aes(fill=log10(pop),label=name, label1=year)) +
  # custom colour scheme
  scale_fill_gradient2(low="white", high="red", midpoint=6) +
  # default map
  coord_sf(crs = st_crs("+proj=longlat +datum=WGS84")) +         
  # Mercator map
  #  coord_sf(crs = st_crs(3857)) +    
  # separate plot for each year
  facet_wrap(~year) + 
  ggtitle('Map of world with gapminder data')

#p

# use plotly for more interactivity - just because we can
ggplotly(p)

There is more

Maps with maps

Code
maps::map()         # map of workd

Code
maps::map("state")  # map of US states

Code
# pacific-centered map of the world
maps::map(wrap = c(0,360), fill = TRUE, col = "lightblue") 

Map Manipulation with maps and sf

Code
# convert map object (list) to sf object (data frame)
counties <- st_as_sf(map("county", plot = FALSE, fill = TRUE))
#states <- st_as_sf(map("state", plot = FALSE, fill = TRUE)) # unused here

# subset US counties in Florida and check if polygon is valid (not for all!)
counties <- counties %>% 
  subset(grepl("florida", counties$ID)) %>% 
  filter(st_is_valid(.))

# Calculate the county area
counties$area <- as.numeric(st_area(counties))

# ggplot with geom_sf() - a more modern version of geom_map
ggplot(data = world) +
    # first show map and borders beyond Florida counties
    geom_sf() + 
    # now overlay the filled counties of Florida
    geom_sf(data = counties, aes(fill = area)) +
    # chose colour scheme
    scale_fill_viridis_c(trans = "sqrt", alpha = .4) +
    # define coordinates
    coord_sf(xlim = c(-88, -78), ylim = c(24.5, 33), expand = FALSE)

And there is more …